1. load the data
library(readr)
library(ggplot2)
df <- read.csv("MI.data", header = FALSE)
colnames(df) <- c("ID", "AGE", "SEX", "INF_ANAM", "STENOK_AN", "FK_STENOK", "IBS_POST", "IBS_NASL", "GB", "SIM_GIPERT", "DLIT_AG", "ZSN_A", "nr_11", "nr_01", "nr_02", "nr_03", "nr_04", "nr_07", "nr_08", "np_01", "np_04", "np_05", "np_07", "np_08", "np_09", "np_10", "endocr_01", "endocr_02", "endocr_03", "zab_leg_01", "zab_leg_02", "zab_leg_03", "zab_leg_04", "zab_leg_06", "S_AD_KBRIG", "D_AD_KBRIG", "S_AD_ORIT", "D_AD_ORIT", "O_L_POST", "K_SH_POST", "MP_TP_POST", "SVT_POST", "GT_POST", "FIB_G_POST", "ant_im", "lat_im", "inf_im", "post_im", "IM_PG_P", "ritm_ecg_p_01", "ritm_ecg_p_02", "ritm_ecg_p_04", "ritm_ecg_p_06", "ritm_ecg_p_07", "ritm_ecg_p_08", "n_r_ecg_p_01", "n_r_ecg_p_02", "n_r_ecg_p_03", "n_r_ecg_p_04", "n_r_ecg_p_05", "n_r_ecg_p_06", "n_r_ecg_p_08", "n_r_ecg_p_09", "n_r_ecg_p_10", "n_p_ecg_p_01", "n_p_ecg_p_03", "n_p_ecg_p_04", "n_p_ecg_p_05", "n_p_ecg_p_06", "n_p_ecg_p_07", "n_p_ecg_p_08", "n_p_ecg_p_09", "n_p_ecg_p_10", "n_p_ecg_p_11", "n_p_ecg_p_12", "fibr_ter_01", "fibr_ter_02", "fibr_ter_03", "fibr_ter_05", "fibr_ter_06", "fibr_ter_07", "fibr_ter_08", "GIPO_K", "K_BLOOD", "GIPER_NA", "NA_BLOOD", "ALT_BLOOD", "AST_BLOOD", "KFK_BLOOD", "L_BLOOD", "ROE", "TIME_B_S", "R_AB_1_n", "R_AB_2_n", "R_AB_3_n", "NA_KB", "NOT_NA_KB", "LID_KB", "NITR_S", "NA_R_1_n", "NA_R_2_n", "NA_R_3_n", "NOT_NA_1_n", "NOT_NA_2_n", "NOT_NA_3_n", "LID_S_n", "B_BLOK_S_n", "ANT_CA_S_n", "GEPAR_S_n", "ASP_S_n", "TIKL_S_n", "TRENT_S_n", "FIBR_PREDS", "PREDS_TAH", "JELUD_TAH", "FIBR_JELUD", "A_V_BLOK", "OTEK_LANC", "RAZRIV", "DRESSLER", "ZSN", "REC_IM", "P_IM_STEN", "LET_IS")
df
  1. data cleaning
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
df <- data.frame(lapply(df, function(x) {
  # 将整数列转换为字符型,以处理 "?"
  x <- as.character(x)
  # 将 "?" 替换为 NA
  x[x == "?"] <- NA
  # 再将列转换回整数型
  as.integer(x)
}))
selected_vars <- df %>%
  select(AGE, SEX, L_BLOOD, DLIT_AG, endocr_01, endocr_02, ROE, fibr_ter_01, fibr_ter_05, fibr_ter_08, B_BLOK_S_n, ant_im,lat_im,inf_im,post_im)

# 查看选定变量的摘要统计信息
summary(selected_vars)
##       AGE             SEX            L_BLOOD          DLIT_AG    
##  Min.   :26.00   Min.   :0.0000   Min.   : 2.000   Min.   :0.00  
##  1st Qu.:54.00   1st Qu.:0.0000   1st Qu.: 6.000   1st Qu.:0.00  
##  Median :63.00   Median :1.0000   Median : 8.000   Median :3.00  
##  Mean   :61.86   Mean   :0.6265   Mean   : 8.337   Mean   :3.34  
##  3rd Qu.:70.00   3rd Qu.:1.0000   3rd Qu.:10.000   3rd Qu.:7.00  
##  Max.   :92.00   Max.   :1.0000   Max.   :27.000   Max.   :7.00  
##  NA's   :8                        NA's   :125      NA's   :248   
##    endocr_01       endocr_02            ROE          fibr_ter_01      
##  Min.   :0.000   Min.   :0.00000   Min.   :  1.00   Min.   :0.000000  
##  1st Qu.:0.000   1st Qu.:0.00000   1st Qu.:  5.00   1st Qu.:0.000000  
##  Median :0.000   Median :0.00000   Median : 10.00   Median :0.000000  
##  Mean   :0.135   Mean   :0.02485   Mean   : 13.44   Mean   :0.007692  
##  3rd Qu.:0.000   3rd Qu.:0.00000   3rd Qu.: 18.00   3rd Qu.:0.000000  
##  Max.   :1.000   Max.   :1.00000   Max.   :140.00   Max.   :1.000000  
##  NA's   :11      NA's   :10        NA's   :203      NA's   :10        
##   fibr_ter_05        fibr_ter_08         B_BLOK_S_n         ant_im     
##  Min.   :0.000000   Min.   :0.000000   Min.   :0.0000   Min.   :0.000  
##  1st Qu.:0.000000   1st Qu.:0.000000   1st Qu.:0.0000   1st Qu.:0.000  
##  Median :0.000000   Median :0.000000   Median :0.0000   Median :1.000  
##  Mean   :0.002367   Mean   :0.001183   Mean   :0.1273   Mean   :1.571  
##  3rd Qu.:0.000000   3rd Qu.:0.000000   3rd Qu.:0.0000   3rd Qu.:4.000  
##  Max.   :1.000000   Max.   :1.000000   Max.   :1.0000   Max.   :4.000  
##  NA's   :10         NA's   :10         NA's   :11       NA's   :83     
##      lat_im           inf_im         post_im      
##  Min.   :0.0000   Min.   :0.000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.000   1st Qu.:0.0000  
##  Median :1.0000   Median :0.000   Median :0.0000  
##  Mean   :0.8617   Mean   :1.015   Mean   :0.2592  
##  3rd Qu.:1.0000   3rd Qu.:2.000   3rd Qu.:0.0000  
##  Max.   :4.0000   Max.   :4.000   Max.   :4.0000  
##  NA's   :80       NA's   :80      NA's   :72
  1. EDA
#LET_IS变量中不同数值的frequency
df$Status <- ifelse(df$LET_IS == 0, "Alive", "Deceased")
df$Status <- as.factor(df$Status)
df$LET_IS <- as.factor(df$LET_IS)
ggplot(df, aes(x = LET_IS)) +
  geom_bar() +
  labs(title = "Frequency Distribution of LET_IS Values",
       x = "LET_IS Values",
       y = "Frequency") +
  theme_minimal()

ggplot(df, aes(x = Status)) +
  geom_bar() +
  labs(title = "Frequency Distribution of Status",
       x = "Status",
       y = "Frequency") +
  theme_minimal()

#不同年龄与性别的患者心梗及其并发症的频率分布
library(ggplot2)
df$SEX <- as.factor(df$SEX)
df$LET_IS <- as.factor(df$LET_IS)
ggplot(df, aes(x = AGE, fill = SEX)) +
  geom_bar(position = "dodge") +
  facet_wrap(~ LET_IS, scales = "free") +
  labs(title = "Distribution of Patients by Age and Gender for Different Lethal outcome",
       x = "Age Group",
       y = "Count",
       fill = "Gender") +
  theme_minimal()+
  theme(axis.text.x = element_text(size = 5))
## Warning: Removed 8 rows containing non-finite values (`stat_count()`).

#不同gender的存活/死亡的比率
df$SEX <- as.factor(df$SEX)

ggplot(df, aes(x = SEX, fill = Status)) +
  geom_bar() +
  labs(title = "Stacked Frequency Distribution of Survival Status by Gender",
       x = "Gender",
       y = "Count",
       fill = "Status") +
  theme_minimal()

#不同的病史中心肌梗塞的数量导致的存活/死亡的比率
df_no_na <- df[!is.na(df$INF_ANAM), ]
ggplot(df_no_na, aes(x = factor(INF_ANAM), y = ..prop.., group = Status, fill = Status)) +
  geom_bar(position = "fill") +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(title = "Stacked Proportion Distribution of Survival Status by Quantity of myocardial infarctions in the anamnesis. 0: zero 1: one 2: two 3: three and more)",
       x = "Quantity of myocardial infarctions",
       y = "Proportion") +
  theme_minimal()
## Warning: The dot-dot notation (`..prop..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(prop)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#是否高血压导致的存活/死亡的比率
df_no_na <- df[!is.na(df$SIM_GIPERT), ]
ggplot(df_no_na, aes(x = factor(SIM_GIPERT), y = ..prop.., group = Status, fill = Status)) +
  geom_bar(position = "fill") +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(title = "Stacked Proportion Distribution of Survival Status by Symptomatic hypertension",
       x = "Survival Status by Symptomatic hypertension",
       y = "Proportion") +
  theme_minimal()

#不同的既往史中有劳力性心绞痛导致的存活/死亡的比率
df_no_na <- df[!is.na(df$STENOK_AN), ]
ggplot(df_no_na, aes(x = factor(STENOK_AN), y = ..prop.., group = Status, fill = Status)) +
  geom_bar(position = "fill") +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(title = "Stacked Proportion Distribution of Survival Status by Exertional angina pectoris in the anamnesis(0: never 1: during the last year 2: one year ago 3: two years ago 4: three years ago 5: 4-5 years ago)",
       x = "Exertional angina pectoris in the anamnesis",
       y = "Proportion") +
  theme_minimal()

#不同的年龄导致的存活/死亡的比率
df_no_na <- df[!is.na(df$AGE), ]
ggplot(df_no_na, aes(x = AGE, y = ..prop.., group = Status, fill = Status)) +
  geom_bar(position = "fill") +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(title = "Stacked Proportion Distribution of AGE",
       x = "AGE",
       y = "Proportion") +
  theme_minimal()+
  theme(axis.text.x = element_text(size = 5))

#不同的红细胞沉降率导致的存活/死亡的比率
df_no_na <- df[!is.na(df$ROE), ]
ggplot(df_no_na, aes(x = ROE, y = ..prop.., group = Status, fill = Status)) +
  geom_bar(position = "fill") +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(title = "Stacked Proportion Distribution of Erythrocyte sedimentation rate",
       x = "ROE",
       y = "Proportion") +
  theme_minimal()

ggplot(df_no_na, aes(x = factor(Status), y = ROE, fill = factor(Status))) +
  geom_violin(trim = FALSE) +
  labs(title = "Violin Plot of Erythrocyte sedimentation rate by Status",
       x = "Status",
       y = "ROE") +
  theme_minimal()

#不同的白细胞计数导致的存活/死亡的比率
ggplot(df, aes(x = L_BLOOD, y = ..prop.., group = Status, fill = Status)) +
  geom_bar(position = "fill") +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(title = "Stacked Proportion Distribution of White cell count",
       x = "White cell count",
       y = "Proportion") +
  theme_minimal()
## Warning: Removed 125 rows containing non-finite values (`stat_count()`).

ggplot(df_no_na, aes(x = factor(Status), y = L_BLOOD, fill = factor(Status))) +
  geom_violin(trim = FALSE) +
  labs(title = "Violin Plot of White cell count by Status",
       x = "Status",
       y = "L_BLOOD") +
  theme_minimal()
## Warning: Removed 5 rows containing non-finite values (`stat_ydensity()`).

#糖尿病导致的存活/死亡的比率
df_no_na <- df[!is.na(df$endocr_01), ]
ggplot(df_no_na, aes(x = factor(endocr_01), y = ..prop.., group = Status, fill = Status)) +
  geom_bar(position = "fill") +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(title = "Stacked Proportion Distribution of Survival Status by Diabetes",
       x = "Diabetes mellitus in the anamnesis",
       y = "Proportion") +
  theme_minimal()

#肥胖导致的存活/死亡的比率
df_no_na <- df[!is.na(df$endocr_02  ), ]
ggplot(df_no_na, aes(x = factor(endocr_02), y = ..prop.., group = Status, fill = Status)) +
  geom_bar(position = "fill") +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(title = "Stacked Proportion Distribution of Survival Status by Obesity",
       x = "Obesity",
       y = "Proportion") +
  theme_minimal()

#每个疗法下的存活/死亡比率
library(tidyr)
library(ggplot2)
library(dplyr)
variables <- c("fibr_ter_01", "fibr_ter_02", "fibr_ter_03", 
               "fibr_ter_05", "fibr_ter_06", "fibr_ter_07", "fibr_ter_08")
df_clean <- df[complete.cases(df[, variables]), ]
# 将数据转换为长格式,以便每种治疗都有对应的条目
df_long <- df_clean %>%
  pivot_longer(
    cols = fibr_ter_01:fibr_ter_08, # 选择所有治疗变量
    names_to = "Therapy",
    values_to = "Treatment_Received"
  ) %>%  
  mutate(
    Treatment_Received = factor(Treatment_Received, levels = c(0, 1), labels = c("No", "Yes"))
  )

# 使用ggplot2绘制分面条形图
ggplot(df_long, aes(x = Treatment_Received, fill = Status)) +
  geom_bar(position = "fill") +
  facet_wrap(~ Therapy, scales = "free_x") + # 每种治疗一个小面
  scale_y_continuous(labels = scales::percent_format()) +
  labs(title = "Survival Rate Proportion by Treatment",
       x = "Treatment Received",
       y = "Proportion",
       fill = "Status") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), # 旋转x轴标签以便于阅读
        strip.text.x = element_text(angle = 0))  # 确保小面的标题水平显示

4. Logistic regression

#null model
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## 
## The following object is masked from 'package:purrr':
## 
##     lift
df$Status <- factor(df$Status, levels = c("Alive", "Deceased"))

fit_null <- glm(Status ~ 1, data = df, family = binomial(link = "logit"))
summary(fit_null)
## 
## Call:
## glm(formula = Status ~ 1, family = binomial(link = "logit"), 
##     data = df)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.66261    0.06626  -25.09   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1491.6  on 1699  degrees of freedom
## Residual deviance: 1491.6  on 1699  degrees of freedom
## AIC: 1493.6
## 
## Number of Fisher Scoring iterations: 3
#Confusion MAtrix

predicted_probs <- predict(fit_null, newdata = df, type = "response")
predicted_classes <- ifelse(predicted_probs > 0.5, "Deceased", "Alive")
predicted_classes <- factor(predicted_classes, levels = levels(df$Status))
cm <- confusionMatrix(predicted_classes, df$Status)
print(cm)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Alive Deceased
##   Alive     1429      271
##   Deceased     0        0
##                                           
##                Accuracy : 0.8406          
##                  95% CI : (0.8223, 0.8577)
##     No Information Rate : 0.8406          
##     P-Value [Acc > NIR] : 0.5162          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.8406          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.8406          
##          Detection Rate : 0.8406          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class : Alive           
## 
# ROC
library(pROC)
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## 
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
roc_obj <- roc(df$Status, predicted_probs)
## Setting levels: control = Alive, case = Deceased
## Setting direction: controls < cases
plot(roc_obj, main="ROC Curve for the null model")
abline(0, 1, lty = 2, col = "gray") 

auc_value <- auc(roc_obj)
cat("AUC:", auc_value, "\n")
## AUC: 0.5
#logistic model
library(tidyverse)
library(caret)
df_all <- as.data.frame(lapply(df, function(x) {
    if(is.numeric(x)) {
        x[is.na(x)] <- median(x, na.rm = TRUE)
    }
    return(x)
})
)
set.seed(625)
indices <- createDataPartition(df_all$Status, p = 0.8, list = FALSE)
train_set <- df_all[indices, ]
test_set <- df_all[-indices, ]

fit_all = glm(Status ~ AGE + factor(SEX) +L_BLOOD + factor(DLIT_AG) + factor(endocr_01) + factor(endocr_02)  + ROE +
              factor(fibr_ter_01) + factor(fibr_ter_05) + factor(fibr_ter_08) + factor(B_BLOK_S_n) + ant_im + lat_im + inf_im + post_im, 
              data = train_set, 
              family = binomial(link = "logit"))

#Confusion Matrix
predicted_probs <- predict(fit_all, newdata = test_set, type = "response")
predicted_classes <- ifelse(predicted_probs > 0.5, "Deceased", "Alive")
predicted_classes <- factor(predicted_classes, levels = levels(df$Status))
cm <- confusionMatrix(predicted_classes, test_set$Status)
print(cm)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Alive Deceased
##   Alive      277       51
##   Deceased     8        3
##                                           
##                Accuracy : 0.826           
##                  95% CI : (0.7813, 0.8648)
##     No Information Rate : 0.8407          
##     P-Value [Acc > NIR] : 0.7945          
##                                           
##                   Kappa : 0.0406          
##                                           
##  Mcnemar's Test P-Value : 4.553e-08       
##                                           
##             Sensitivity : 0.97193         
##             Specificity : 0.05556         
##          Pos Pred Value : 0.84451         
##          Neg Pred Value : 0.27273         
##              Prevalence : 0.84071         
##          Detection Rate : 0.81711         
##    Detection Prevalence : 0.96755         
##       Balanced Accuracy : 0.51374         
##                                           
##        'Positive' Class : Alive           
## 
# ROC
library(pROC)
roc_obj <- roc(test_set$Status, predicted_probs)
## Setting levels: control = Alive, case = Deceased
## Setting direction: controls < cases
plot(roc_obj, main="ROC Curve for the null model")
abline(0, 1, lty = 2, col = "gray") 

auc_value <- auc(roc_obj)
cat("AUC:", auc_value, "\n")
## AUC: 0.7033788
# Test for the collinearity
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:purrr':
## 
##     some
## The following object is masked from 'package:dplyr':
## 
##     recode
vif(fit_all)
##                         GVIF Df GVIF^(1/(2*Df))
## AGE                 1.231701  1        1.109820
## factor(SEX)         1.303317  1        1.141629
## L_BLOOD             1.033607  1        1.016664
## factor(DLIT_AG)     1.225596  7        1.014637
## factor(endocr_01)   1.123386  1        1.059899
## factor(endocr_02)   1.046852  1        1.023158
## ROE                 1.076991  1        1.037782
## factor(fibr_ter_01) 1.033340  1        1.016533
## factor(fibr_ter_05) 1.000000  1        1.000000
## factor(fibr_ter_08) 1.000000  1        1.000000
## factor(B_BLOK_S_n)  1.063993  1        1.031500
## ant_im              1.914410  1        1.383622
## lat_im              1.504295  1        1.226497
## inf_im              1.602693  1        1.265975
## post_im             1.251508  1        1.118708
reduced_model <- step(fit_all, direction="backward")
## Start:  AIC=1072.6
## Status ~ AGE + factor(SEX) + L_BLOOD + factor(DLIT_AG) + factor(endocr_01) + 
##     factor(endocr_02) + ROE + factor(fibr_ter_01) + factor(fibr_ter_05) + 
##     factor(fibr_ter_08) + factor(B_BLOK_S_n) + ant_im + lat_im + 
##     inf_im + post_im
## 
##                       Df Deviance    AIC
## - factor(DLIT_AG)      7   1038.4 1068.4
## - factor(fibr_ter_01)  1   1028.9 1070.9
## - factor(fibr_ter_08)  1   1028.9 1070.9
## - post_im              1   1029.0 1071.0
## - factor(SEX)          1   1029.2 1071.2
## - factor(endocr_01)    1   1029.5 1071.5
## - factor(fibr_ter_05)  1   1030.5 1072.5
## <none>                     1028.6 1072.6
## - ROE                  1   1032.1 1074.1
## - lat_im               1   1033.2 1075.2
## - factor(B_BLOK_S_n)   1   1033.2 1075.2
## - factor(endocr_02)    1   1035.5 1077.5
## - inf_im               1   1039.3 1081.3
## - L_BLOOD              1   1050.9 1092.9
## - ant_im               1   1051.8 1093.8
## - AGE                  1   1062.4 1104.4
## 
## Step:  AIC=1068.43
## Status ~ AGE + factor(SEX) + L_BLOOD + factor(endocr_01) + factor(endocr_02) + 
##     ROE + factor(fibr_ter_01) + factor(fibr_ter_05) + factor(fibr_ter_08) + 
##     factor(B_BLOK_S_n) + ant_im + lat_im + inf_im + post_im
## 
##                       Df Deviance    AIC
## - factor(fibr_ter_01)  1   1038.7 1066.7
## - post_im              1   1038.8 1066.8
## - factor(fibr_ter_08)  1   1038.8 1066.8
## - factor(SEX)          1   1039.5 1067.5
## - factor(endocr_01)    1   1039.7 1067.7
## - factor(fibr_ter_05)  1   1040.4 1068.4
## <none>                     1038.4 1068.4
## - ROE                  1   1041.8 1069.8
## - factor(B_BLOK_S_n)   1   1043.1 1071.1
## - lat_im               1   1043.2 1071.2
## - factor(endocr_02)    1   1045.3 1073.3
## - inf_im               1   1049.1 1077.1
## - ant_im               1   1060.8 1088.8
## - L_BLOOD              1   1061.6 1089.6
## - AGE                  1   1076.7 1104.7
## 
## Step:  AIC=1066.73
## Status ~ AGE + factor(SEX) + L_BLOOD + factor(endocr_01) + factor(endocr_02) + 
##     ROE + factor(fibr_ter_05) + factor(fibr_ter_08) + factor(B_BLOK_S_n) + 
##     ant_im + lat_im + inf_im + post_im
## 
##                       Df Deviance    AIC
## - post_im              1   1039.0 1065.0
## - factor(fibr_ter_08)  1   1039.1 1065.1
## - factor(SEX)          1   1039.7 1065.7
## - factor(endocr_01)    1   1040.0 1066.0
## - factor(fibr_ter_05)  1   1040.7 1066.7
## <none>                     1038.7 1066.7
## - ROE                  1   1042.0 1068.0
## - factor(B_BLOK_S_n)   1   1043.2 1069.2
## - lat_im               1   1043.6 1069.6
## - factor(endocr_02)    1   1045.9 1071.9
## - inf_im               1   1049.4 1075.4
## - ant_im               1   1061.2 1087.2
## - L_BLOOD              1   1062.2 1088.2
## - AGE                  1   1076.9 1102.9
## 
## Step:  AIC=1065.04
## Status ~ AGE + factor(SEX) + L_BLOOD + factor(endocr_01) + factor(endocr_02) + 
##     ROE + factor(fibr_ter_05) + factor(fibr_ter_08) + factor(B_BLOK_S_n) + 
##     ant_im + lat_im + inf_im
## 
##                       Df Deviance    AIC
## - factor(fibr_ter_08)  1   1039.4 1063.4
## - factor(SEX)          1   1040.1 1064.1
## - factor(endocr_01)    1   1040.2 1064.2
## - factor(fibr_ter_05)  1   1040.9 1064.9
## <none>                     1039.0 1065.0
## - ROE                  1   1042.4 1066.4
## - factor(B_BLOK_S_n)   1   1043.7 1067.7
## - lat_im               1   1044.0 1068.0
## - factor(endocr_02)    1   1046.1 1070.1
## - inf_im               1   1051.1 1075.1
## - ant_im               1   1061.3 1085.3
## - L_BLOOD              1   1062.9 1086.9
## - AGE                  1   1077.0 1101.0
## 
## Step:  AIC=1063.39
## Status ~ AGE + factor(SEX) + L_BLOOD + factor(endocr_01) + factor(endocr_02) + 
##     ROE + factor(fibr_ter_05) + factor(B_BLOK_S_n) + ant_im + 
##     lat_im + inf_im
## 
##                       Df Deviance    AIC
## - factor(SEX)          1   1040.4 1062.4
## - factor(endocr_01)    1   1040.5 1062.5
## - factor(fibr_ter_05)  1   1041.3 1063.3
## <none>                     1039.4 1063.4
## - ROE                  1   1042.8 1064.8
## - factor(B_BLOK_S_n)   1   1044.0 1066.0
## - lat_im               1   1044.4 1066.4
## - factor(endocr_02)    1   1046.4 1068.4
## - inf_im               1   1051.4 1073.4
## - ant_im               1   1061.5 1083.5
## - L_BLOOD              1   1063.3 1085.3
## - AGE                  1   1077.3 1099.3
## 
## Step:  AIC=1062.42
## Status ~ AGE + L_BLOOD + factor(endocr_01) + factor(endocr_02) + 
##     ROE + factor(fibr_ter_05) + factor(B_BLOK_S_n) + ant_im + 
##     lat_im + inf_im
## 
##                       Df Deviance    AIC
## - factor(endocr_01)    1   1042.2 1062.2
## - factor(fibr_ter_05)  1   1042.3 1062.3
## <none>                     1040.4 1062.4
## - ROE                  1   1044.3 1064.3
## - factor(B_BLOK_S_n)   1   1045.1 1065.1
## - lat_im               1   1045.7 1065.7
## - factor(endocr_02)    1   1048.1 1068.1
## - inf_im               1   1052.0 1072.0
## - ant_im               1   1062.2 1082.2
## - L_BLOOD              1   1064.4 1084.4
## - AGE                  1   1087.7 1107.7
## 
## Step:  AIC=1062.21
## Status ~ AGE + L_BLOOD + factor(endocr_02) + ROE + factor(fibr_ter_05) + 
##     factor(B_BLOK_S_n) + ant_im + lat_im + inf_im
## 
##                       Df Deviance    AIC
## - factor(fibr_ter_05)  1   1044.1 1062.1
## <none>                     1042.2 1062.2
## - ROE                  1   1046.9 1064.9
## - factor(B_BLOK_S_n)   1   1047.3 1065.3
## - lat_im               1   1047.6 1065.6
## - factor(endocr_02)    1   1050.3 1068.3
## - inf_im               1   1054.4 1072.4
## - ant_im               1   1064.3 1082.3
## - L_BLOOD              1   1066.0 1084.0
## - AGE                  1   1090.8 1108.8
## 
## Step:  AIC=1062.14
## Status ~ AGE + L_BLOOD + factor(endocr_02) + ROE + factor(B_BLOK_S_n) + 
##     ant_im + lat_im + inf_im
## 
##                      Df Deviance    AIC
## <none>                    1044.1 1062.1
## - ROE                 1   1048.9 1064.9
## - factor(B_BLOK_S_n)  1   1049.2 1065.2
## - lat_im              1   1049.2 1065.2
## - factor(endocr_02)   1   1052.2 1068.2
## - inf_im              1   1056.2 1072.2
## - ant_im              1   1066.0 1082.0
## - L_BLOOD             1   1067.1 1083.1
## - AGE                 1   1092.9 1108.9
summary(reduced_model)
## 
## Call:
## glm(formula = Status ~ AGE + L_BLOOD + factor(endocr_02) + ROE + 
##     factor(B_BLOK_S_n) + ant_im + lat_im + inf_im, family = binomial(link = "logit"), 
##     data = train_set)
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -7.122304   0.611701 -11.643  < 2e-16 ***
## AGE                  0.053063   0.007964   6.663 2.68e-11 ***
## L_BLOOD              0.105273   0.021620   4.869 1.12e-06 ***
## factor(endocr_02)1   1.172718   0.389158   3.013  0.00258 ** 
## ROE                  0.015948   0.007155   2.229  0.02582 *  
## factor(B_BLOK_S_n)1 -0.662011   0.314782  -2.103  0.03546 *  
## ant_im               0.279873   0.059886   4.673 2.96e-06 ***
## lat_im               0.229466   0.100733   2.278  0.02273 *  
## inf_im               0.230443   0.065895   3.497  0.00047 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1194.3  on 1360  degrees of freedom
## Residual deviance: 1044.1  on 1352  degrees of freedom
## AIC: 1062.1
## 
## Number of Fisher Scoring iterations: 5
reduced_fit = glm(formula = Status ~ AGE + L_BLOOD + factor(DLIT_AG) + factor(endocr_01) +
    factor(endocr_02) + factor(B_BLOK_S_n) +factor(fibr_ter_05) + ant_im + lat_im + inf_im, family = binomial(link = "logit"), 
    data = df_all)
summary(reduced_fit)
## 
## Call:
## glm(formula = Status ~ AGE + L_BLOOD + factor(DLIT_AG) + factor(endocr_01) + 
##     factor(endocr_02) + factor(B_BLOK_S_n) + factor(fibr_ter_05) + 
##     ant_im + lat_im + inf_im, family = binomial(link = "logit"), 
##     data = df_all)
## 
## Coefficients:
##                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           -7.028532   0.556695 -12.625  < 2e-16 ***
## AGE                    0.052312   0.007287   7.178 7.05e-13 ***
## L_BLOOD                0.104602   0.019637   5.327 9.99e-08 ***
## factor(DLIT_AG)1      -0.676217   0.496733  -1.361  0.17341    
## factor(DLIT_AG)2      -0.093403   0.478675  -0.195  0.84529    
## factor(DLIT_AG)3       0.418651   0.209724   1.996  0.04591 *  
## factor(DLIT_AG)4       0.771002   0.560332   1.376  0.16883    
## factor(DLIT_AG)5      -0.213263   0.419142  -0.509  0.61089    
## factor(DLIT_AG)6       0.725070   0.242464   2.990  0.00279 ** 
## factor(DLIT_AG)7       0.251408   0.199078   1.263  0.20664    
## factor(endocr_01)1     0.321610   0.188145   1.709  0.08738 .  
## factor(endocr_02)1     1.071470   0.357238   2.999  0.00271 ** 
## factor(B_BLOK_S_n)1   -0.650860   0.285223  -2.282  0.02249 *  
## factor(fibr_ter_05)1 -13.451325 410.150615  -0.033  0.97384    
## ant_im                 0.238284   0.054255   4.392 1.12e-05 ***
## lat_im                 0.243534   0.093237   2.612  0.00900 ** 
## inf_im                 0.218676   0.059538   3.673  0.00024 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1491.6  on 1699  degrees of freedom
## Residual deviance: 1296.6  on 1683  degrees of freedom
## AIC: 1330.6
## 
## Number of Fisher Scoring iterations: 13
# this block deals with the data, to generate 2 columns: AgeGroup, and DLIT_AG
selected_df = df_all[, c("Status", "AGE", "L_BLOOD", "DLIT_AG", "endocr_01", "endocr_02", "B_BLOK_S_n","fibr_ter_05",
                         "ant_im", "lat_im", "inf_im")]

selected_df$AgeGroup <- cut(selected_df$AGE,
                     breaks = c(-Inf, 50, 60, 70, Inf),
                     labels = c("<50", "50-60", "60-70", ">70"),
                     right = FALSE)
selected_df$DLIT_AG = factor(selected_df$DLIT_AG)
# conver the data type
selected_df$Status = as.factor(selected_df$Status)
selected_df$endocr_01 = as.factor(selected_df$endocr_01)
selected_df$endocr_02 = as.factor(selected_df$endocr_02)
selected_df$B_BLOK_S_n = as.factor(selected_df$B_BLOK_S_n)
selected_df$fibr_ter_05 = as.factor(selected_df$fibr_ter_05)
# output the dataset
write.csv(selected_df, file = "selected_df.csv", row.names = FALSE)

This section is to do the multilevel logistic regression models

set.seed(119)
# start to do the multilevel logistic regression
selected_df <- read_csv("selected_df.csv")
## Rows: 1700 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): Status, AgeGroup
## dbl (10): AGE, L_BLOOD, DLIT_AG, endocr_01, endocr_02, B_BLOK_S_n, fibr_ter_...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
selected_df$Status = as.factor(selected_df$Status)
selected_df$endocr_01 = as.factor(selected_df$endocr_01)
selected_df$endocr_02 = as.factor(selected_df$endocr_02)
selected_df$B_BLOK_S_n = as.factor(selected_df$B_BLOK_S_n)
selected_df$fibr_ter_05 = as.factor(selected_df$fibr_ter_05)
indices <- createDataPartition(selected_df$Status, p = 0.8, list = FALSE)
train_set <- selected_df[indices, ]
test_set <- selected_df[-indices, ]
library(lme4)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
# Complete Pooling(ignore the age group)
m0 <- glm(Status ~ L_BLOOD + DLIT_AG + endocr_01 + endocr_02 + B_BLOK_S_n + fibr_ter_05 + ant_im + lat_im + inf_im,
            data = train_set,
            family = binomial(link="logit")
          )
print(summary(m0))
## 
## Call:
## glm(formula = Status ~ L_BLOOD + DLIT_AG + endocr_01 + endocr_02 + 
##     B_BLOK_S_n + fibr_ter_05 + ant_im + lat_im + inf_im, family = binomial(link = "logit"), 
##     data = train_set)
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -3.53922    0.27601 -12.823  < 2e-16 ***
## L_BLOOD        0.09978    0.02143   4.657 3.21e-06 ***
## DLIT_AG        0.07933    0.02751   2.883  0.00393 ** 
## endocr_011     0.38912    0.20438   1.904  0.05693 .  
## endocr_021     0.72892    0.40777   1.788  0.07384 .  
## B_BLOK_S_n1   -0.87389    0.30937  -2.825  0.00473 ** 
## fibr_ter_051 -13.74471  427.00959  -0.032  0.97432    
## ant_im         0.28027    0.05898   4.752 2.01e-06 ***
## lat_im         0.08733    0.10014   0.872  0.38317    
## inf_im         0.15359    0.06703   2.291  0.02195 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1194.3  on 1360  degrees of freedom
## Residual deviance: 1100.5  on 1351  degrees of freedom
## AIC: 1120.5
## 
## Number of Fisher Scoring iterations: 13
# Confusion Matrix
predicted_probs <- predict(m0, newdata = test_set, type = "response")
predicted_classes <- ifelse(predicted_probs > 0.5, "Deceased", "Alive")
predicted_classes <- factor(predicted_classes, levels = levels(test_set$Status))
cm <- confusionMatrix(predicted_classes, test_set$Status)
print(cm)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Alive Deceased
##   Alive      283       51
##   Deceased     2        3
##                                           
##                Accuracy : 0.8437          
##                  95% CI : (0.8005, 0.8806)
##     No Information Rate : 0.8407          
##     P-Value [Acc > NIR] : 0.4771          
##                                           
##                   Kappa : 0.0768          
##                                           
##  Mcnemar's Test P-Value : 4.301e-11       
##                                           
##             Sensitivity : 0.99298         
##             Specificity : 0.05556         
##          Pos Pred Value : 0.84731         
##          Neg Pred Value : 0.60000         
##              Prevalence : 0.84071         
##          Detection Rate : 0.83481         
##    Detection Prevalence : 0.98525         
##       Balanced Accuracy : 0.52427         
##                                           
##        'Positive' Class : Alive           
## 
# ROC and AUC
library(pROC)
roc_obj <- roc(test_set$Status, predicted_probs)
## Setting levels: control = Alive, case = Deceased
## Setting direction: controls < cases
plot(roc_obj, main="ROC Curve for the Model", col="blue")
abline(0, 1, lty = 2, col = "gray")

auc_value <- auc(roc_obj)
cat("AUC for the Model:", auc_value, "\n")
## AUC for the Model: 0.7169266
library(lme4)
# No Pooling
m <- glm(Status ~ factor(AgeGroup) + L_BLOOD + DLIT_AG + endocr_01 + endocr_02 + B_BLOK_S_n + fibr_ter_05 + ant_im + lat_im + inf_im,
            data = train_set,
            family = binomial(link="logit")
          )
print(summary(m))
## 
## Call:
## glm(formula = Status ~ factor(AgeGroup) + L_BLOOD + DLIT_AG + 
##     endocr_01 + endocr_02 + B_BLOK_S_n + fibr_ter_05 + ant_im + 
##     lat_im + inf_im, family = binomial(link = "logit"), data = train_set)
## 
## Coefficients:
##                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            -4.85796    0.47359 -10.258  < 2e-16 ***
## factor(AgeGroup)>70     1.88568    0.41829   4.508 6.54e-06 ***
## factor(AgeGroup)50-60   0.92494    0.43116   2.145 0.031932 *  
## factor(AgeGroup)60-70   1.51792    0.41227   3.682 0.000232 ***
## L_BLOOD                 0.10476    0.02185   4.794 1.63e-06 ***
## DLIT_AG                 0.03928    0.02855   1.376 0.168920    
## endocr_011              0.29446    0.20643   1.426 0.153729    
## endocr_021              0.83277    0.41524   2.005 0.044911 *  
## B_BLOK_S_n1            -0.64678    0.31360  -2.062 0.039164 *  
## fibr_ter_051          -13.65208  426.39842  -0.032 0.974458    
## ant_im                  0.26827    0.05928   4.525 6.03e-06 ***
## lat_im                  0.13374    0.10268   1.303 0.192744    
## inf_im                  0.15163    0.06726   2.254 0.024165 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1194.3  on 1360  degrees of freedom
## Residual deviance: 1062.8  on 1348  degrees of freedom
## AIC: 1088.8
## 
## Number of Fisher Scoring iterations: 13
# Confusion Matrix
predicted_probs <- predict(m, newdata = test_set, type = "response")
predicted_classes <- ifelse(predicted_probs > 0.5, "Deceased", "Alive")
predicted_classes <- factor(predicted_classes, levels = levels(test_set$Status))
cm <- confusionMatrix(predicted_classes, test_set$Status)
print(cm)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Alive Deceased
##   Alive      283       51
##   Deceased     2        3
##                                           
##                Accuracy : 0.8437          
##                  95% CI : (0.8005, 0.8806)
##     No Information Rate : 0.8407          
##     P-Value [Acc > NIR] : 0.4771          
##                                           
##                   Kappa : 0.0768          
##                                           
##  Mcnemar's Test P-Value : 4.301e-11       
##                                           
##             Sensitivity : 0.99298         
##             Specificity : 0.05556         
##          Pos Pred Value : 0.84731         
##          Neg Pred Value : 0.60000         
##              Prevalence : 0.84071         
##          Detection Rate : 0.83481         
##    Detection Prevalence : 0.98525         
##       Balanced Accuracy : 0.52427         
##                                           
##        'Positive' Class : Alive           
## 
# ROC and AUC
library(pROC)
roc_obj <- roc(test_set$Status, predicted_probs)
## Setting levels: control = Alive, case = Deceased
## Setting direction: controls < cases
plot(roc_obj, main="ROC Curve for the Model", col="blue")
abline(0, 1, lty = 2, col = "gray")

auc_value <- auc(roc_obj)
cat("AUC for the Model:", auc_value, "\n")
## AUC for the Model: 0.7798246
library(lme4)
# Partial Pooling
# multilevel model 1 of random intercept
m1 <- glmer(Status ~ 1 + (1 | AgeGroup),
            data = train_set,
            family = binomial(link="logit"),
            nAGQ = 1)
print(summary(m1))
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: Status ~ 1 + (1 | AgeGroup)
##    Data: train_set
## 
##      AIC      BIC   logLik deviance df.resid 
##   1163.8   1174.3   -579.9   1159.8     1359 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -0.5552 -0.4671 -0.3450 -0.2340  4.2735 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  AgeGroup (Intercept) 0.4839   0.6956  
## Number of obs: 1361, groups:  AgeGroup, 4
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -1.9463     0.3644  -5.341 9.26e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Confusion Matrix
predicted_probs <- predict(m1, newdata = test_set, type = "response")
predicted_classes <- ifelse(predicted_probs > 0.5, "Deceased", "Alive")
predicted_classes <- factor(predicted_classes, levels = levels(test_set$Status))
cm <- confusionMatrix(predicted_classes, test_set$Status)
print(cm)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Alive Deceased
##   Alive      285       54
##   Deceased     0        0
##                                          
##                Accuracy : 0.8407         
##                  95% CI : (0.7973, 0.878)
##     No Information Rate : 0.8407         
##     P-Value [Acc > NIR] : 0.5362         
##                                          
##                   Kappa : 0              
##                                          
##  Mcnemar's Test P-Value : 5.498e-13      
##                                          
##             Sensitivity : 1.0000         
##             Specificity : 0.0000         
##          Pos Pred Value : 0.8407         
##          Neg Pred Value :    NaN         
##              Prevalence : 0.8407         
##          Detection Rate : 0.8407         
##    Detection Prevalence : 1.0000         
##       Balanced Accuracy : 0.5000         
##                                          
##        'Positive' Class : Alive          
## 
# ROC and AUC
library(pROC)
roc_obj <- roc(test_set$Status, predicted_probs)
## Setting levels: control = Alive, case = Deceased
## Setting direction: controls < cases
plot(roc_obj, main="ROC Curve for the Model", col="blue")
abline(0, 1, lty = 2, col = "gray")

auc_value <- auc(roc_obj)
cat("AUC for the Model:", auc_value, "\n")
## AUC for the Model: 0.6943145
# multilevel model 2 of random intercept and random slope of structured variable "endocr_01"
m2 <- glmer(Status ~ L_BLOOD + DLIT_AG + endocr_01 + endocr_02 + B_BLOK_S_n + fibr_ter_05 + ant_im + lat_im + inf_im 
            + (1 + endocr_01 | AgeGroup),      
            data = train_set,
            family = binomial(link="logit"),
            nAGQ = 1)
## boundary (singular) fit: see help('isSingular')
print(summary(m2))
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: Status ~ L_BLOOD + DLIT_AG + endocr_01 + endocr_02 + B_BLOK_S_n +  
##     fibr_ter_05 + ant_im + lat_im + inf_im + (1 + endocr_01 |      AgeGroup)
##    Data: train_set
## 
##      AIC      BIC   logLik deviance df.resid 
##   1102.6   1170.4   -538.3   1076.6     1348 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.4847 -0.4559 -0.3322 -0.2134  5.5174 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  AgeGroup (Intercept) 0.4236   0.6509        
##           endocr_011  0.1032   0.3212   -1.00
## Number of obs: 1361, groups:  AgeGroup, 4
## 
## Fixed effects:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -3.72736    0.43579  -8.553  < 2e-16 ***
## L_BLOOD        0.10344    0.02181   4.743 2.11e-06 ***
## DLIT_AG        0.04290    0.02855   1.503   0.1329    
## endocr_011     0.50431    0.31753   1.588   0.1122    
## endocr_021     0.82442    0.41467   1.988   0.0468 *  
## B_BLOK_S_n1   -0.66438    0.31276  -2.124   0.0337 *  
## fibr_ter_051 -15.33771  120.67965  -0.127   0.8989    
## ant_im         0.27048    0.05917   4.571 4.85e-06 ***
## lat_im         0.12806    0.10227   1.252   0.2105    
## inf_im         0.15229    0.06708   2.270   0.0232 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) L_BLOO DLIT_A en_011 en_021 B_BLOK f__051 ant_im lat_im
## L_BLOOD     -0.418                                                        
## DLIT_AG     -0.213 -0.026                                                 
## endocr_011  -0.423 -0.002 -0.115                                          
## endocr_021  -0.020  0.035 -0.096 -0.026                                   
## B_BLOK_S_n1 -0.076  0.037 -0.053  0.062  0.028                            
## fibr_tr_051 -0.001  0.000  0.000  0.000 -0.001  0.000                     
## ant_im      -0.198 -0.073  0.025  0.008 -0.010  0.004  0.000              
## lat_im      -0.146 -0.041  0.053 -0.006  0.005 -0.056  0.000 -0.426       
## inf_im      -0.273 -0.085  0.079 -0.026 -0.049  0.062  0.000  0.443  0.136
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
# Confusion Matrix
predicted_probs <- predict(m2, newdata = test_set, type = "response")
predicted_classes <- ifelse(predicted_probs > 0.5, "Deceased", "Alive")
predicted_classes <- factor(predicted_classes, levels = levels(test_set$Status))
cm <- confusionMatrix(predicted_classes, test_set$Status)
print(cm)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Alive Deceased
##   Alive      283       51
##   Deceased     2        3
##                                           
##                Accuracy : 0.8437          
##                  95% CI : (0.8005, 0.8806)
##     No Information Rate : 0.8407          
##     P-Value [Acc > NIR] : 0.4771          
##                                           
##                   Kappa : 0.0768          
##                                           
##  Mcnemar's Test P-Value : 4.301e-11       
##                                           
##             Sensitivity : 0.99298         
##             Specificity : 0.05556         
##          Pos Pred Value : 0.84731         
##          Neg Pred Value : 0.60000         
##              Prevalence : 0.84071         
##          Detection Rate : 0.83481         
##    Detection Prevalence : 0.98525         
##       Balanced Accuracy : 0.52427         
##                                           
##        'Positive' Class : Alive           
## 
# ROC and AUC
library(pROC)
roc_obj <- roc(test_set$Status, predicted_probs)
## Setting levels: control = Alive, case = Deceased
## Setting direction: controls < cases
plot(roc_obj, main="ROC Curve for the Model", col="blue")
abline(0, 1, lty = 2, col = "gray")

auc_value <- auc(roc_obj)
cat("AUC for the Model:", auc_value, "\n")
## AUC for the Model: 0.7767706
# multilevel model 3 of random intercept and random slope of structured variable "endocr_02"
m3 <- glmer(Status ~ L_BLOOD + DLIT_AG + endocr_01 + endocr_02 + B_BLOK_S_n + fibr_ter_05 + ant_im + lat_im + inf_im + 
            (1 + endocr_02| AgeGroup) , 
            data = train_set,
            family = binomial(link="logit"),
            nAGQ = 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
print(summary(m3))
## Warning in vcov.merMod(object, use.hessian = use.hessian): variance-covariance matrix computed from finite-difference Hessian is
## not positive definite or contains NA values: falling back to var-cov estimated from RX
## Warning in vcov.merMod(object, correlation = correlation, sigm = sig): variance-covariance matrix computed from finite-difference Hessian is
## not positive definite or contains NA values: falling back to var-cov estimated from RX
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: Status ~ L_BLOOD + DLIT_AG + endocr_01 + endocr_02 + B_BLOK_S_n +  
##     fibr_ter_05 + ant_im + lat_im + inf_im + (1 + endocr_02 |      AgeGroup)
##    Data: train_set
## 
##      AIC      BIC   logLik deviance df.resid 
##   1103.5   1171.3   -538.8   1077.5     1348 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.4570 -0.4534 -0.3341 -0.2148  5.4349 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  AgeGroup (Intercept) 0.399143 0.63178       
##           endocr_021  0.005527 0.07434  -1.00
## Number of obs: 1361, groups:  AgeGroup, 4
## 
## Fixed effects:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -3.73164    0.42676  -8.744  < 2e-16 ***
## L_BLOOD        0.10451    0.02178   4.798 1.60e-06 ***
## DLIT_AG        0.04319    0.02846   1.517   0.1292    
## endocr_011     0.30262    0.20614   1.468   0.1421    
## endocr_021     0.86283    0.41471   2.081   0.0375 *  
## B_BLOK_S_n1   -0.67208    0.31317  -2.146   0.0319 *  
## fibr_ter_051 -13.83831  775.40205  -0.018   0.9858    
## ant_im         0.26973    0.05921   4.555 5.23e-06 ***
## lat_im         0.12881    0.10219   1.260   0.2075    
## inf_im         0.15173    0.06718   2.259   0.0239 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) L_BLOO DLIT_A en_011 en_021 B_BLOK f__051 ant_im lat_im
## L_BLOOD     -0.425                                                        
## DLIT_AG     -0.230 -0.023                                                 
## endocr_011  -0.054  0.044 -0.167                                          
## endocr_021  -0.088  0.032 -0.098 -0.017                                   
## B_BLOK_S_n1 -0.072  0.037 -0.048  0.075  0.028                            
## fibr_tr_051  0.000  0.000  0.000  0.000  0.000  0.000                     
## ant_im      -0.204 -0.075  0.025 -0.003 -0.010  0.003  0.000              
## lat_im      -0.146 -0.041  0.057 -0.001  0.006 -0.054  0.000 -0.425       
## inf_im      -0.279 -0.086  0.080 -0.054 -0.048  0.062  0.000  0.445  0.136
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## unable to evaluate scaled gradient
## Model failed to converge: degenerate  Hessian with 1 negative eigenvalues
# Confusion Matrix
predicted_probs <- predict(m3, newdata = test_set, type = "response")
predicted_classes <- ifelse(predicted_probs > 0.5, "Deceased", "Alive")
predicted_classes <- factor(predicted_classes, levels = levels(test_set$Status))
cm <- confusionMatrix(predicted_classes, test_set$Status)
print(cm)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Alive Deceased
##   Alive      283       51
##   Deceased     2        3
##                                           
##                Accuracy : 0.8437          
##                  95% CI : (0.8005, 0.8806)
##     No Information Rate : 0.8407          
##     P-Value [Acc > NIR] : 0.4771          
##                                           
##                   Kappa : 0.0768          
##                                           
##  Mcnemar's Test P-Value : 4.301e-11       
##                                           
##             Sensitivity : 0.99298         
##             Specificity : 0.05556         
##          Pos Pred Value : 0.84731         
##          Neg Pred Value : 0.60000         
##              Prevalence : 0.84071         
##          Detection Rate : 0.83481         
##    Detection Prevalence : 0.98525         
##       Balanced Accuracy : 0.52427         
##                                           
##        'Positive' Class : Alive           
## 
# ROC and AUC
library(pROC)
roc_obj <- roc(test_set$Status, predicted_probs)
## Setting levels: control = Alive, case = Deceased
## Setting direction: controls < cases
plot(roc_obj, main="ROC Curve for the Model", col="blue")
abline(0, 1, lty = 2, col = "gray")

auc_value <- auc(roc_obj)
cat("AUC for the Model:", auc_value, "\n")
## AUC for the Model: 0.7751462
# multilevel model 4 of random intercept and random slope of structured variable "L_BLOOD"
m4 <- glmer(Status ~ L_BLOOD + DLIT_AG + endocr_01 + endocr_02  + B_BLOK_S_n
             + fibr_ter_05 + ant_im + lat_im + inf_im + (1 + L_BLOOD | AgeGroup), 
            data = train_set,
            family = binomial(link="logit"),
            nAGQ = 1)
## boundary (singular) fit: see help('isSingular')
print(summary(m4))
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: Status ~ L_BLOOD + DLIT_AG + endocr_01 + endocr_02 + B_BLOK_S_n +  
##     fibr_ter_05 + ant_im + lat_im + inf_im + (1 + L_BLOOD | AgeGroup)
##    Data: train_set
## 
##      AIC      BIC   logLik deviance df.resid 
##   1103.1   1170.9   -538.5   1077.1     1348 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.6474 -0.4527 -0.3318 -0.2151  5.2838 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev. Corr
##  AgeGroup (Intercept) 0.1855187 0.4307       
##           L_BLOOD     0.0005381 0.0232   1.00
## Number of obs: 1361, groups:  AgeGroup, 4
## 
## Fixed effects:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -3.63721    0.38480  -9.452  < 2e-16 ***
## L_BLOOD        0.09333    0.03028   3.082  0.00205 ** 
## DLIT_AG        0.04347    0.02859   1.520  0.12844    
## endocr_011     0.31284    0.20684   1.513  0.13040    
## endocr_021     0.83670    0.41345   2.024  0.04300 *  
## B_BLOK_S_n1   -0.66754    0.31258  -2.136  0.03271 *  
## fibr_ter_051 -16.25381  128.00009  -0.127  0.89895    
## ant_im         0.26966    0.05925   4.551 5.34e-06 ***
## lat_im         0.13073    0.10246   1.276  0.20198    
## inf_im         0.15334    0.06734   2.277  0.02278 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) L_BLOO DLIT_A en_011 en_021 B_BLOK f__051 ant_im lat_im
## L_BLOOD     -0.328                                                        
## DLIT_AG     -0.239 -0.025                                                 
## endocr_011  -0.033 -0.006 -0.162                                          
## endocr_021  -0.017  0.017 -0.097 -0.016                                   
## B_BLOK_S_n1 -0.078  0.014 -0.052  0.075  0.027                            
## fibr_tr_051  0.001 -0.001  0.000  0.000  0.000  0.000                     
## ant_im      -0.224 -0.056  0.025 -0.003 -0.009  0.004  0.000              
## lat_im      -0.154 -0.048  0.053  0.001  0.005 -0.050  0.000 -0.424       
## inf_im      -0.296 -0.084  0.080 -0.051 -0.048  0.065  0.000  0.445  0.138
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
# Confusion Matrix
predicted_probs <- predict(m4, newdata = test_set, type = "response")
predicted_classes <- ifelse(predicted_probs > 0.5, "Deceased", "Alive")
predicted_classes <- factor(predicted_classes, levels = levels(test_set$Status))
cm <- confusionMatrix(predicted_classes, test_set$Status)
print(cm)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Alive Deceased
##   Alive      283       50
##   Deceased     2        4
##                                           
##                Accuracy : 0.8466          
##                  95% CI : (0.8038, 0.8833)
##     No Information Rate : 0.8407          
##     P-Value [Acc > NIR] : 0.4182          
##                                           
##                   Kappa : 0.1048          
##                                           
##  Mcnemar's Test P-Value : 7.138e-11       
##                                           
##             Sensitivity : 0.99298         
##             Specificity : 0.07407         
##          Pos Pred Value : 0.84985         
##          Neg Pred Value : 0.66667         
##              Prevalence : 0.84071         
##          Detection Rate : 0.83481         
##    Detection Prevalence : 0.98230         
##       Balanced Accuracy : 0.53353         
##                                           
##        'Positive' Class : Alive           
## 
# ROC and AUC
library(pROC)
roc_obj <- roc(test_set$Status, predicted_probs)
## Setting levels: control = Alive, case = Deceased
## Setting direction: controls < cases
plot(roc_obj, main="ROC Curve for the Model", col="blue")
abline(0, 1, lty = 2, col = "gray")

auc_value <- auc(roc_obj)
cat("AUC for the Model:", auc_value, "\n")
## AUC for the Model: 0.7775504
# multilevel model 5 of random intercept and random slope of structured variable "DLIT_AG"
m5 <- glmer(Status ~ L_BLOOD + DLIT_AG + endocr_01 + endocr_02  + B_BLOK_S_n + fibr_ter_05 + ant_im + lat_im + inf_im + 
            (1 + DLIT_AG | AgeGroup), 
            data = train_set,
            family = binomial(link="logit"),
            nAGQ = 1)
## boundary (singular) fit: see help('isSingular')
print(summary(m5))
## Warning in vcov.merMod(object, use.hessian = use.hessian): variance-covariance matrix computed from finite-difference Hessian is
## not positive definite or contains NA values: falling back to var-cov estimated from RX
## Warning in vcov.merMod(object, correlation = correlation, sigm = sig): variance-covariance matrix computed from finite-difference Hessian is
## not positive definite or contains NA values: falling back to var-cov estimated from RX
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: Status ~ L_BLOOD + DLIT_AG + endocr_01 + endocr_02 + B_BLOK_S_n +  
##     fibr_ter_05 + ant_im + lat_im + inf_im + (1 + DLIT_AG | AgeGroup)
##    Data: train_set
## 
##      AIC      BIC   logLik deviance df.resid 
##   1102.6   1170.4   -538.3   1076.6     1348 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.4762 -0.4532 -0.3323 -0.2139  5.8333 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr
##  AgeGroup (Intercept) 0.28198  0.53102      
##           DLIT_AG     0.00177  0.04207  1.00
## Number of obs: 1361, groups:  AgeGroup, 4
## 
## Fixed effects:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -3.70746    0.38963  -9.515  < 2e-16 ***
## L_BLOOD        0.10532    0.02180   4.831 1.36e-06 ***
## DLIT_AG        0.02004    0.03655   0.548   0.5834    
## endocr_011     0.29987    0.20644   1.453   0.1463    
## endocr_021     0.84672    0.41525   2.039   0.0414 *  
## B_BLOK_S_n1   -0.65772    0.31332  -2.099   0.0358 *  
## fibr_ter_051 -14.31855  962.91266  -0.015   0.9881    
## ant_im         0.27003    0.05929   4.554 5.25e-06 ***
## lat_im         0.12908    0.10218   1.263   0.2065    
## inf_im         0.15475    0.06730   2.299   0.0215 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) L_BLOO DLIT_A en_011 en_021 B_BLOK f__051 ant_im lat_im
## L_BLOOD     -0.466                                                        
## DLIT_AG      0.214 -0.022                                                 
## endocr_011  -0.060  0.044 -0.124                                          
## endocr_021  -0.021  0.032 -0.077 -0.016                                   
## B_BLOK_S_n1 -0.080  0.038 -0.047  0.072  0.029                            
## fibr_tr_051  0.000  0.000  0.000  0.000  0.000  0.000                     
## ant_im      -0.224 -0.075  0.022 -0.003 -0.010  0.003  0.000              
## lat_im      -0.160 -0.041  0.039 -0.001  0.005 -0.051  0.000 -0.423       
## inf_im      -0.307 -0.085  0.060 -0.055 -0.048  0.066  0.000  0.447  0.138
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
# Confusion Matrix
predicted_probs <- predict(m5, newdata = test_set, type = "response")
predicted_classes <- ifelse(predicted_probs > 0.5, "Deceased", "Alive")
predicted_classes <- factor(predicted_classes, levels = levels(test_set$Status))
cm <- confusionMatrix(predicted_classes, test_set$Status)
print(cm)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Alive Deceased
##   Alive      283       51
##   Deceased     2        3
##                                           
##                Accuracy : 0.8437          
##                  95% CI : (0.8005, 0.8806)
##     No Information Rate : 0.8407          
##     P-Value [Acc > NIR] : 0.4771          
##                                           
##                   Kappa : 0.0768          
##                                           
##  Mcnemar's Test P-Value : 4.301e-11       
##                                           
##             Sensitivity : 0.99298         
##             Specificity : 0.05556         
##          Pos Pred Value : 0.84731         
##          Neg Pred Value : 0.60000         
##              Prevalence : 0.84071         
##          Detection Rate : 0.83481         
##    Detection Prevalence : 0.98525         
##       Balanced Accuracy : 0.52427         
##                                           
##        'Positive' Class : Alive           
## 
# ROC and AUC
library(pROC)
roc_obj <- roc(test_set$Status, predicted_probs)
## Setting levels: control = Alive, case = Deceased
## Setting direction: controls < cases
plot(roc_obj, main="ROC Curve for the Model", col="blue")
abline(0, 1, lty = 2, col = "gray")

auc_value <- auc(roc_obj)
cat("AUC for the Model:", auc_value, "\n")
## AUC for the Model: 0.7737167
# multilevel model 6 of random intercept and random slope of structured variable "B_BLOK_S_n"
m6 <- glmer(Status ~ L_BLOOD + DLIT_AG + endocr_01 + endocr_02  + B_BLOK_S_n + fibr_ter_05 + ant_im + lat_im + inf_im +
            (1 + B_BLOK_S_n | AgeGroup), 
            data = train_set,
            family = binomial(link="logit"),
            nAGQ = 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
print(summary(m6))
## Warning in vcov.merMod(object, use.hessian = use.hessian): variance-covariance matrix computed from finite-difference Hessian is
## not positive definite or contains NA values: falling back to var-cov estimated from RX
## Warning in vcov.merMod(object, correlation = correlation, sigm = sig): variance-covariance matrix computed from finite-difference Hessian is
## not positive definite or contains NA values: falling back to var-cov estimated from RX
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: Status ~ L_BLOOD + DLIT_AG + endocr_01 + endocr_02 + B_BLOK_S_n +  
##     fibr_ter_05 + ant_im + lat_im + inf_im + (1 + B_BLOK_S_n |      AgeGroup)
##    Data: train_set
## 
##      AIC      BIC   logLik deviance df.resid 
##   1101.0   1168.8   -537.5   1075.0     1348 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.4820 -0.4527 -0.3381 -0.2056  6.9733 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr
##  AgeGroup (Intercept) 0.3456   0.5879       
##           B_BLOK_S_n1 0.5942   0.7708   1.00
## Number of obs: 1361, groups:  AgeGroup, 4
## 
## Fixed effects:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -3.72806    0.41064  -9.079  < 2e-16 ***
## L_BLOOD        0.10470    0.02175   4.815 1.47e-06 ***
## DLIT_AG        0.04365    0.02845   1.534   0.1249    
## endocr_011     0.30719    0.20600   1.491   0.1359    
## endocr_021     0.82917    0.41419   2.002   0.0453 *  
## B_BLOK_S_n1   -0.99166    0.51093  -1.941   0.0523 .  
## fibr_ter_051 -13.29849  580.59288  -0.023   0.9817    
## ant_im         0.27041    0.05927   4.563 5.05e-06 ***
## lat_im         0.13776    0.10296   1.338   0.1809    
## inf_im         0.15629    0.06734   2.321   0.0203 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) L_BLOO DLIT_A en_011 en_021 B_BLOK f__051 ant_im lat_im
## L_BLOOD     -0.443                                                        
## DLIT_AG     -0.243 -0.022                                                 
## endocr_011  -0.057  0.045 -0.171                                          
## endocr_021  -0.020  0.031 -0.096 -0.016                                   
## B_BLOK_S_n1  0.518  0.024 -0.008  0.058  0.020                            
## fibr_tr_051  0.000  0.000  0.000  0.000  0.000  0.000                     
## ant_im      -0.213 -0.072  0.024 -0.003 -0.008  0.008  0.000              
## lat_im      -0.157 -0.036  0.059  0.002  0.003 -0.027  0.000 -0.423       
## inf_im      -0.295 -0.081  0.080 -0.054 -0.048  0.047  0.000  0.446  0.142
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## unable to evaluate scaled gradient
## Model failed to converge: degenerate  Hessian with 1 negative eigenvalues
# Confusion Matrix
predicted_probs <- predict(m6, newdata = test_set, type = "response")
predicted_classes <- ifelse(predicted_probs > 0.5, "Deceased", "Alive")
predicted_classes <- factor(predicted_classes, levels = levels(test_set$Status))
cm <- confusionMatrix(predicted_classes, test_set$Status)
print(cm)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Alive Deceased
##   Alive      283       51
##   Deceased     2        3
##                                           
##                Accuracy : 0.8437          
##                  95% CI : (0.8005, 0.8806)
##     No Information Rate : 0.8407          
##     P-Value [Acc > NIR] : 0.4771          
##                                           
##                   Kappa : 0.0768          
##                                           
##  Mcnemar's Test P-Value : 4.301e-11       
##                                           
##             Sensitivity : 0.99298         
##             Specificity : 0.05556         
##          Pos Pred Value : 0.84731         
##          Neg Pred Value : 0.60000         
##              Prevalence : 0.84071         
##          Detection Rate : 0.83481         
##    Detection Prevalence : 0.98525         
##       Balanced Accuracy : 0.52427         
##                                           
##        'Positive' Class : Alive           
## 
# ROC and AUC
library(pROC)
roc_obj <- roc(test_set$Status, predicted_probs)
## Setting levels: control = Alive, case = Deceased
## Setting direction: controls < cases
plot(roc_obj, main="ROC Curve for the Model", col="blue")
abline(0, 1, lty = 2, col = "gray")

auc_value <- auc(roc_obj)
cat("AUC for the Model:", auc_value, "\n")
## AUC for the Model: 0.7704678
# multilevel model 7 of random intercept and random slope of structured variable "fibr_ter_05"
m7 <- glmer(Status ~ L_BLOOD + DLIT_AG + endocr_01 + endocr_02 + 
            B_BLOK_S_n + fibr_ter_05 + ant_im + lat_im + inf_im + (1 + fibr_ter_05 | AgeGroup), 
            data = train_set,
            family = binomial(link="logit"),
            nAGQ = 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
print(summary(m7))
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: Status ~ L_BLOOD + DLIT_AG + endocr_01 + endocr_02 + B_BLOK_S_n +  
##     fibr_ter_05 + ant_im + lat_im + inf_im + (1 + fibr_ter_05 |      AgeGroup)
##    Data: train_set
## 
##      AIC      BIC   logLik deviance df.resid 
##   1103.6   1171.4   -538.8   1077.6     1348 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.4568 -0.4529 -0.3344 -0.2150  5.4321 
## 
## Random effects:
##  Groups   Name         Variance Std.Dev. Corr 
##  AgeGroup (Intercept)  0.3978   0.6307        
##           fibr_ter_051 1.1485   1.0717   -0.08
## Number of obs: 1361, groups:  AgeGroup, 4
## 
## Fixed effects:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -3.73145    0.42809  -8.716  < 2e-16 ***
## L_BLOOD        0.10458    0.02178   4.801 1.57e-06 ***
## DLIT_AG        0.04321    0.02856   1.513   0.1303    
## endocr_011     0.30294    0.20616   1.469   0.1417    
## endocr_021     0.83182    0.41458   2.006   0.0448 *  
## B_BLOK_S_n1   -0.67222    0.31261  -2.150   0.0315 *  
## fibr_ter_051 -20.12832  512.01631  -0.039   0.9686    
## ant_im         0.26957    0.05917   4.556 5.22e-06 ***
## lat_im         0.12852    0.10221   1.257   0.2086    
## inf_im         0.15160    0.06712   2.259   0.0239 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) L_BLOO DLIT_A en_011 en_021 B_BLOK f__051 ant_im lat_im
## L_BLOOD     -0.425                                                        
## DLIT_AG     -0.219 -0.025                                                 
## endocr_011  -0.051  0.043 -0.164                                          
## endocr_021  -0.019  0.032 -0.097 -0.016                                   
## B_BLOK_S_n1 -0.077  0.038 -0.053  0.073  0.027                            
## fibr_tr_051 -0.007  0.003  0.002  0.000  0.001 -0.001                     
## ant_im      -0.202 -0.075  0.026 -0.003 -0.011  0.003  0.002              
## lat_im      -0.149 -0.040  0.052 -0.002  0.005 -0.053  0.001 -0.425       
## inf_im      -0.278 -0.086  0.079 -0.054 -0.049  0.062  0.002  0.445  0.136
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## unable to evaluate scaled gradient
## Model failed to converge: degenerate  Hessian with 1 negative eigenvalues
# Confusion Matrix
predicted_probs <- predict(m7, newdata = test_set, type = "response")
predicted_classes <- ifelse(predicted_probs > 0.5, "Deceased", "Alive")
predicted_classes <- factor(predicted_classes, levels = levels(test_set$Status))
cm <- confusionMatrix(predicted_classes, test_set$Status)
print(cm)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Alive Deceased
##   Alive      283       51
##   Deceased     2        3
##                                           
##                Accuracy : 0.8437          
##                  95% CI : (0.8005, 0.8806)
##     No Information Rate : 0.8407          
##     P-Value [Acc > NIR] : 0.4771          
##                                           
##                   Kappa : 0.0768          
##                                           
##  Mcnemar's Test P-Value : 4.301e-11       
##                                           
##             Sensitivity : 0.99298         
##             Specificity : 0.05556         
##          Pos Pred Value : 0.84731         
##          Neg Pred Value : 0.60000         
##              Prevalence : 0.84071         
##          Detection Rate : 0.83481         
##    Detection Prevalence : 0.98525         
##       Balanced Accuracy : 0.52427         
##                                           
##        'Positive' Class : Alive           
## 
# ROC and AUC
library(pROC)
roc_obj <- roc(test_set$Status, predicted_probs)
## Setting levels: control = Alive, case = Deceased
## Setting direction: controls < cases
plot(roc_obj, main="ROC Curve for the Model", col="blue")
abline(0, 1, lty = 2, col = "gray")

auc_value <- auc(roc_obj)
cat("AUC for the Model:", auc_value, "\n")
## AUC for the Model: 0.7748213
# multilevel model 8 of random intercept and random slope of structured variable "ant_im"
m8 <- glmer(Status ~ L_BLOOD + DLIT_AG + endocr_01 + endocr_02 + 
            B_BLOK_S_n + fibr_ter_05 + ant_im + lat_im + inf_im + (1 + ant_im | AgeGroup), 
            data = train_set,
            family = binomial(link="logit"),
            nAGQ = 1)
## boundary (singular) fit: see help('isSingular')
print(summary(m8))
## Warning in vcov.merMod(object, use.hessian = use.hessian): variance-covariance matrix computed from finite-difference Hessian is
## not positive definite or contains NA values: falling back to var-cov estimated from RX
## Warning in vcov.merMod(object, correlation = correlation, sigm = sig): variance-covariance matrix computed from finite-difference Hessian is
## not positive definite or contains NA values: falling back to var-cov estimated from RX
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: Status ~ L_BLOOD + DLIT_AG + endocr_01 + endocr_02 + B_BLOK_S_n +  
##     fibr_ter_05 + ant_im + lat_im + inf_im + (1 + ant_im | AgeGroup)
##    Data: train_set
## 
##      AIC      BIC   logLik deviance df.resid 
##   1103.5   1171.3   -538.7   1077.5     1348 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.4687 -0.4504 -0.3346 -0.2167  5.3295 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev. Corr
##  AgeGroup (Intercept) 0.3491161 0.59086      
##           ant_im      0.0003425 0.01851  1.00
## Number of obs: 1361, groups:  AgeGroup, 4
## 
## Fixed effects:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -3.70878    0.41098  -9.024  < 2e-16 ***
## L_BLOOD        0.10453    0.02179   4.798 1.60e-06 ***
## DLIT_AG        0.04306    0.02848   1.512   0.1305    
## endocr_011     0.30476    0.20626   1.477   0.1395    
## endocr_021     0.83067    0.41411   2.006   0.0449 *  
## B_BLOK_S_n1   -0.66766    0.31326  -2.131   0.0331 *  
## fibr_ter_051 -13.42857  636.57579  -0.021   0.9832    
## ant_im         0.25923    0.06002   4.319 1.57e-05 ***
## lat_im         0.13109    0.10234   1.281   0.2002    
## inf_im         0.15102    0.06717   2.248   0.0246 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) L_BLOO DLIT_A en_011 en_021 B_BLOK f__051 ant_im lat_im
## L_BLOOD     -0.441                                                        
## DLIT_AG     -0.240 -0.024                                                 
## endocr_011  -0.057  0.043 -0.167                                          
## endocr_021  -0.019  0.032 -0.097 -0.016                                   
## B_BLOK_S_n1 -0.075  0.038 -0.049  0.075  0.027                            
## fibr_tr_051  0.000  0.000  0.000  0.000  0.000  0.000                     
## ant_im      -0.091 -0.075  0.030 -0.002 -0.011  0.000  0.000              
## lat_im      -0.152 -0.041  0.056 -0.001  0.005 -0.053  0.000 -0.421       
## inf_im      -0.290 -0.086  0.080 -0.053 -0.049  0.063  0.000  0.439  0.137
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
# Confusion Matrix
predicted_probs <- predict(m8, newdata = test_set, type = "response")
predicted_classes <- ifelse(predicted_probs > 0.5, "Deceased", "Alive")
predicted_classes <- factor(predicted_classes, levels = levels(test_set$Status))
cm <- confusionMatrix(predicted_classes, test_set$Status)
print(cm)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Alive Deceased
##   Alive      283       51
##   Deceased     2        3
##                                           
##                Accuracy : 0.8437          
##                  95% CI : (0.8005, 0.8806)
##     No Information Rate : 0.8407          
##     P-Value [Acc > NIR] : 0.4771          
##                                           
##                   Kappa : 0.0768          
##                                           
##  Mcnemar's Test P-Value : 4.301e-11       
##                                           
##             Sensitivity : 0.99298         
##             Specificity : 0.05556         
##          Pos Pred Value : 0.84731         
##          Neg Pred Value : 0.60000         
##              Prevalence : 0.84071         
##          Detection Rate : 0.83481         
##    Detection Prevalence : 0.98525         
##       Balanced Accuracy : 0.52427         
##                                           
##        'Positive' Class : Alive           
## 
# ROC and AUC
library(pROC)
roc_obj <- roc(test_set$Status, predicted_probs)
## Setting levels: control = Alive, case = Deceased
## Setting direction: controls < cases
plot(roc_obj, main="ROC Curve for the Model", col="blue")
abline(0, 1, lty = 2, col = "gray")

auc_value <- auc(roc_obj)
cat("AUC for the Model:", auc_value, "\n")
## AUC for the Model: 0.7754061
# multilevel model 9 of random intercept and random slope of structured variable "lat_im"
m9 <- glmer(Status ~ L_BLOOD + DLIT_AG + endocr_01 + endocr_02 + 
            B_BLOK_S_n + fibr_ter_05 + ant_im + lat_im + inf_im + (1 + lat_im | AgeGroup), 
            data = train_set,
            family = binomial(link="logit"),
            nAGQ = 1)
## boundary (singular) fit: see help('isSingular')
print(summary(m9))
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: Status ~ L_BLOOD + DLIT_AG + endocr_01 + endocr_02 + B_BLOK_S_n +  
##     fibr_ter_05 + ant_im + lat_im + inf_im + (1 + lat_im | AgeGroup)
##    Data: train_set
## 
##      AIC      BIC   logLik deviance df.resid 
##   1102.3   1170.1   -538.1   1076.3     1348 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.4873 -0.4472 -0.3325 -0.2184  5.3102 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr
##  AgeGroup (Intercept) 0.23837  0.4882       
##           lat_im      0.01853  0.1361   1.00
## Number of obs: 1361, groups:  AgeGroup, 4
## 
## Fixed effects:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -3.66326    0.38112  -9.612  < 2e-16 ***
## L_BLOOD        0.10463    0.02182   4.796 1.62e-06 ***
## DLIT_AG        0.04292    0.02861   1.500   0.1335    
## endocr_011     0.31099    0.20711   1.502   0.1332    
## endocr_021     0.82254    0.41454   1.984   0.0472 *  
## B_BLOK_S_n1   -0.64449    0.31330  -2.057   0.0397 *  
## fibr_ter_051 -13.30000  147.80185  -0.090   0.9283    
## ant_im         0.26903    0.05926   4.540 5.63e-06 ***
## lat_im         0.07266    0.13514   0.538   0.5908    
## inf_im         0.15141    0.06733   2.249   0.0245 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) L_BLOO DLIT_A en_011 en_021 B_BLOK f__051 ant_im lat_im
## L_BLOOD     -0.478                                                        
## DLIT_AG     -0.249 -0.026                                                 
## endocr_011  -0.053  0.043 -0.163                                          
## endocr_021  -0.026  0.031 -0.097 -0.018                                   
## B_BLOK_S_n1 -0.080  0.041 -0.051  0.078  0.025                            
## fibr_tr_051  0.000  0.000  0.000  0.000  0.001  0.000                     
## ant_im      -0.226 -0.076  0.027 -0.003 -0.011  0.001  0.000              
## lat_im       0.147 -0.028  0.046 -0.012  0.017 -0.059  0.000 -0.324       
## inf_im      -0.317 -0.086  0.080 -0.052 -0.049  0.067  0.000  0.444  0.114
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
# Confusion Matrix
predicted_probs <- predict(m9, newdata = test_set, type = "response")
predicted_classes <- ifelse(predicted_probs > 0.5, "Deceased", "Alive")
predicted_classes <- factor(predicted_classes, levels = levels(test_set$Status))
cm <- confusionMatrix(predicted_classes, test_set$Status)
print(cm)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Alive Deceased
##   Alive      283       51
##   Deceased     2        3
##                                           
##                Accuracy : 0.8437          
##                  95% CI : (0.8005, 0.8806)
##     No Information Rate : 0.8407          
##     P-Value [Acc > NIR] : 0.4771          
##                                           
##                   Kappa : 0.0768          
##                                           
##  Mcnemar's Test P-Value : 4.301e-11       
##                                           
##             Sensitivity : 0.99298         
##             Specificity : 0.05556         
##          Pos Pred Value : 0.84731         
##          Neg Pred Value : 0.60000         
##              Prevalence : 0.84071         
##          Detection Rate : 0.83481         
##    Detection Prevalence : 0.98525         
##       Balanced Accuracy : 0.52427         
##                                           
##        'Positive' Class : Alive           
## 
# ROC and AUC
library(pROC)
roc_obj <- roc(test_set$Status, predicted_probs)
## Setting levels: control = Alive, case = Deceased
## Setting direction: controls < cases
plot(roc_obj, main="ROC Curve for the Model", col="blue")
abline(0, 1, lty = 2, col = "gray")

auc_value <- auc(roc_obj)
cat("AUC for the Model:", auc_value, "\n")
## AUC for the Model: 0.7710526
# multilevel model 10 of random intercept and random slope of structured variable "inf_im"
m10 <- glmer(Status ~ L_BLOOD + DLIT_AG + endocr_01 + endocr_02 + 
            B_BLOK_S_n + fibr_ter_05 + ant_im + lat_im + inf_im + (1 + inf_im | AgeGroup), 
            data = train_set,
            family = binomial(link="logit"),
            nAGQ = 1)
## boundary (singular) fit: see help('isSingular')
print(summary(m10))
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: Status ~ L_BLOOD + DLIT_AG + endocr_01 + endocr_02 + B_BLOK_S_n +  
##     fibr_ter_05 + ant_im + lat_im + inf_im + (1 + inf_im | AgeGroup)
##    Data: train_set
## 
##      AIC      BIC   logLik deviance df.resid 
##   1103.4   1171.3   -538.7   1077.4     1348 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.4646 -0.4518 -0.3346 -0.2143  5.4287 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev. Corr 
##  AgeGroup (Intercept) 0.4286674 0.65473       
##           inf_im      0.0006927 0.02632  -1.00
## Number of obs: 1361, groups:  AgeGroup, 4
## 
## Fixed effects:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -3.74446    0.43910  -8.528  < 2e-16 ***
## L_BLOOD        0.10423    0.02181   4.778 1.77e-06 ***
## DLIT_AG        0.04278    0.02860   1.496   0.1347    
## endocr_011     0.30466    0.20631   1.477   0.1398    
## endocr_021     0.83100    0.41455   2.005   0.0450 *  
## B_BLOK_S_n1   -0.66990    0.31282  -2.141   0.0322 *  
## fibr_ter_051 -19.39913  209.02429  -0.093   0.9261    
## ant_im         0.27067    0.05930   4.565 5.00e-06 ***
## lat_im         0.13051    0.10241   1.274   0.2025    
## inf_im         0.16725    0.08264   2.024   0.0430 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) L_BLOO DLIT_A en_011 en_021 B_BLOK f__051 ant_im lat_im
## L_BLOOD     -0.409                                                        
## DLIT_AG     -0.208 -0.023                                                 
## endocr_011  -0.052  0.042 -0.165                                          
## endocr_021  -0.018  0.033 -0.097 -0.017                                   
## B_BLOK_S_n1 -0.077  0.037 -0.055  0.074  0.028                            
## fibr_tr_051 -0.001  0.000  0.000  0.000 -0.003  0.001                     
## ant_im      -0.201 -0.077  0.023 -0.002 -0.011  0.003  0.000              
## lat_im      -0.150 -0.044  0.049  0.000  0.005 -0.052  0.000 -0.420       
## inf_im      -0.394 -0.099  0.036 -0.029 -0.044  0.064  0.001  0.393  0.144
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
# Confusion Matrix
predicted_probs <- predict(m10, newdata = test_set, type = "response")
predicted_classes <- ifelse(predicted_probs > 0.5, "Deceased", "Alive")
predicted_classes <- factor(predicted_classes, levels = levels(test_set$Status))
cm <- confusionMatrix(predicted_classes, test_set$Status)
print(cm)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Alive Deceased
##   Alive      283       51
##   Deceased     2        3
##                                           
##                Accuracy : 0.8437          
##                  95% CI : (0.8005, 0.8806)
##     No Information Rate : 0.8407          
##     P-Value [Acc > NIR] : 0.4771          
##                                           
##                   Kappa : 0.0768          
##                                           
##  Mcnemar's Test P-Value : 4.301e-11       
##                                           
##             Sensitivity : 0.99298         
##             Specificity : 0.05556         
##          Pos Pred Value : 0.84731         
##          Neg Pred Value : 0.60000         
##              Prevalence : 0.84071         
##          Detection Rate : 0.83481         
##    Detection Prevalence : 0.98525         
##       Balanced Accuracy : 0.52427         
##                                           
##        'Positive' Class : Alive           
## 
# ROC and AUC
library(pROC)
roc_obj <- roc(test_set$Status, predicted_probs)
## Setting levels: control = Alive, case = Deceased
## Setting direction: controls < cases
plot(roc_obj, main="ROC Curve for the Model", col="blue")
abline(0, 1, lty = 2, col = "gray")

auc_value <- auc(roc_obj)
cat("AUC for the Model:", auc_value, "\n")
## AUC for the Model: 0.7733268